{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!--\n",
    "#\n",
    "# Licensed to the Apache Software Foundation (ASF) under one or more\n",
    "# contributor license agreements.  See the NOTICE file distributed with\n",
    "# this work for additional information regarding copyright ownership.\n",
    "# The ASF licenses this file to You under the Apache License, Version 2.0\n",
    "# (the \"License\"); you may not use this file except in compliance with\n",
    "# the License.  You may obtain a copy of the License at\n",
    "#\n",
    "#    http://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License.\n",
    "#\n",
    "-->\n",
    "\n",
    "# Precommit Job Times\n",
    "This notebook fetches test statistics from Jenkins.\n",
    "\n",
    "## Requirements\n",
    "\n",
    "```shell\n",
    "pip install pandas matplotlib requests\n",
    "# You may need to restart Jupyter for matplotlib to work.\n",
    "```\n",
    "\n",
    "**Note:** Requests to `ci-beam.apache.org` must contain a ?depth= or ?tree= argument, otherwise your IP will get banned. [Policy](https://cwiki.apache.org/confluence/display/INFRA/Using+the+ASF+Jenkins+API)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.dates as md\n",
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fetch precommit job data from Jenkins.\n",
    "\n",
    "class Build(dict):\n",
    "    def __init__(self, job_name, json):\n",
    "        self['job_name'] = job_name\n",
    "        self['result'] = json['result']\n",
    "        self['number'] = json['number']\n",
    "        self['timestamp'] = pd.Timestamp.utcfromtimestamp(json['timestamp'] / 1000)\n",
    "        self['queuingDurationMillis'] = -1\n",
    "        self['totalDurationMillis'] = -1\n",
    "        for action in json['actions']:\n",
    "            if action.get('_class', None) == 'jenkins.metrics.impl.TimeInQueueAction':\n",
    "                self['queuingDurationMinutes'] = action['queuingDurationMillis'] / 60000.\n",
    "                self['totalDurationMinutes'] = action['totalDurationMillis'] / 60000.\n",
    "        if self['queuingDurationMinutes'] == -1:\n",
    "            raise ValueError('could not find queuingDurationMillis in: %s', json)\n",
    "        if self['totalDurationMinutes'] == -1:\n",
    "            raise ValueError('could not find totalDurationMillis in: %s', json)\n",
    "        \n",
    "# Can be 'builds' (last 50) or 'allBuilds'.\n",
    "builds_key = 'allBuilds'  \n",
    "\n",
    "builds = []\n",
    "job_names = ['beam_PreCommit_Java_Cron', 'beam_PreCommit_Python_Cron', 'beam_PreCommit_Go_Cron']\n",
    "for job_name in job_names:\n",
    "    url = 'https://ci-beam.apache.org/job/%s/api/json' % job_name\n",
    "    params = {\n",
    "        'tree': '%s[result,number,timestamp,actions[queuingDurationMillis,totalDurationMillis]]' % builds_key}\n",
    "    r = requests.get(url, params=params)\n",
    "    data = r.json()\n",
    "    builds.extend([Build(job_name, build_json)\n",
    "                         for build_json in data[builds_key]])\n",
    "\n",
    "df = pd.DataFrame(builds)\n",
    "\n",
    "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=4)\n",
    "df_4weeks = df[df.timestamp >= timestamp_cutoff]\n",
    "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(weeks=1)\n",
    "df_1week = df[df.timestamp >= timestamp_cutoff]\n",
    "timestamp_cutoff = pd.Timestamp.utcnow().tz_convert(None) - pd.Timedelta(days=1)\n",
    "df_1day = df[df.timestamp >= timestamp_cutoff]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Graphs of precommit job times.\n",
    "\n",
    "for job_name in job_names:\n",
    "    duration_df = df_4weeks[df_4weeks.job_name == job_name]\n",
    "    duration_df = duration_df[['timestamp', 'queuingDurationMinutes', 'totalDurationMinutes']]\n",
    "    ax = duration_df.plot(x='timestamp')\n",
    "    ax.set_title(job_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get 95th percentile of precommit run times.\n",
    "test_dfs = {'4 weeks': df_4weeks, '1 week': df_1week, '1 day': df_1day}\n",
    "metrics = []\n",
    "\n",
    "for sample_time, test_df in test_dfs.items():\n",
    "    for job_name in job_names:\n",
    "        df_times = test_df[test_df.job_name == job_name]\n",
    "        for percentile in [95]:\n",
    "            total_all = np.percentile(df_times.totalDurationMinutes, q=percentile)\n",
    "            total_success = np.percentile(df_times[df_times.result == 'SUCCESS'].totalDurationMinutes,\n",
    "                                          q=percentile)\n",
    "            queue = np.percentile(df_times.queuingDurationMinutes, q=percentile)\n",
    "            metrics.append({'job_name': '%s %s %dth' % (\n",
    "                                job_name.replace('beam_PreCommit_','').replace('_GradleBuild',''),\n",
    "                                sample_time, percentile),\n",
    "                            'totalDurationMinutes_all': total_all,\n",
    "                            'totalDurationMinutes_success_only': total_success,\n",
    "                            'queuingDurationMinutes': queue,\n",
    "                           })\n",
    "\n",
    "pd.DataFrame(metrics).sort_values('job_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fetch individual test data (precommit) from Jenkins.\n",
    "MAX_FETCH_PER_JOB_TYPE = 5\n",
    "\n",
    "test_results_raw = []\n",
    "for job_name in list(df.job_name.unique()):\n",
    "    if job_name == 'beam_PreCommit_Go_Cron':\n",
    "        # TODO: Go builds are missing testReport data on Jenkins.\n",
    "        continue\n",
    "    build_nums = list(df.number[df.job_name == job_name].unique())\n",
    "    num_fetched = 0\n",
    "    for build_num in build_nums:\n",
    "        url = 'https://ci-beam.apache.org/job/%s/%s/testReport/api/json?depth=1' % (job_name, build_num)\n",
    "        print('.', end='')\n",
    "        r = requests.get(url)\n",
    "        if not r.ok:\n",
    "            # Typically a 404 means that the job is still running.\n",
    "            print('skipping (%s): %s' % (r.status_code, url))\n",
    "            continue\n",
    "        raw_result = r.json()\n",
    "        raw_result['job_name'] = job_name\n",
    "        raw_result['build_num'] = build_num\n",
    "        test_results_raw.append(raw_result)\n",
    "        \n",
    "        num_fetched += 1\n",
    "        if num_fetched >= MAX_FETCH_PER_JOB_TYPE:\n",
    "            break\n",
    "\n",
    "print(' done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze individual test results.\n",
    "\n",
    "class TestResult(dict):\n",
    "    def __init__(self, job_name, build_num, json):\n",
    "        self['job_name'] = job_name\n",
    "        self['build_num'] = build_num\n",
    "        self['name'] = json['name']\n",
    "        self['duration'] = json['duration']\n",
    "        self['className'] = json['className']\n",
    "        self['status'] = json['status']\n",
    "\n",
    "test_results = []\n",
    "for test_result_raw in test_results_raw:\n",
    "    job_name = test_result_raw['job_name']\n",
    "    build_num = test_result_raw['build_num']\n",
    "    for suite in test_result_raw['suites']:\n",
    "        for case in suite['cases']:\n",
    "            test_results.append(TestResult(job_name, build_num, case))\n",
    "\n",
    "df_tests = pd.DataFrame(test_results)\n",
    "df_tests = df_tests.drop(columns=['build_num'])\n",
    "df_tests = df_tests.groupby(['className', 'job_name', 'name', 'status'], as_index=False).max()\n",
    "df_tests = df_tests.sort_values('duration', ascending=False)\n",
    "\n",
    "def filter_test_results(job_name, status):\n",
    "    res = df_tests\n",
    "    if job_name != 'all':\n",
    "        res = res[res.job_name == job_name]\n",
    "    if status != 'all':\n",
    "        res = res[res.status == status]\n",
    "    return res.head(n=20)\n",
    "\n",
    "from ipywidgets import interact\n",
    "interact(filter_test_results,\n",
    "         job_name=['all'] + list(df_tests.job_name.unique()),\n",
    "         status=['all'] + list(df_tests.status.unique()))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
